In [7]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import plotly.express as px
In [8]:
df=pd.read_csv("C:\\Users\\djo16\\Cognifyz\\Dataset .csv")
print(df.head())
Restaurant ID Restaurant Name Country Code City \
0 6317637 Le Petit Souffle 162 Makati City
1 6304287 Izakaya Kikufuji 162 Makati City
2 6300002 Heat - Edsa Shangri-La 162 Mandaluyong City
3 6318506 Ooma 162 Mandaluyong City
4 6314302 Sambo Kojin 162 Mandaluyong City
Address \
0 Third Floor, Century City Mall, Kalayaan Avenu...
1 Little Tokyo, 2277 Chino Roces Avenue, Legaspi...
2 Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...
3 Third Floor, Mega Fashion Hall, SM Megamall, O...
4 Third Floor, Mega Atrium, SM Megamall, Ortigas...
Locality \
0 Century City Mall, Poblacion, Makati City
1 Little Tokyo, Legaspi Village, Makati City
2 Edsa Shangri-La, Ortigas, Mandaluyong City
3 SM Megamall, Ortigas, Mandaluyong City
4 SM Megamall, Ortigas, Mandaluyong City
Locality Verbose Longitude Latitude \
0 Century City Mall, Poblacion, Makati City, Mak... 121.027535 14.565443
1 Little Tokyo, Legaspi Village, Makati City, Ma... 121.014101 14.553708
2 Edsa Shangri-La, Ortigas, Mandaluyong City, Ma... 121.056831 14.581404
3 SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.056475 14.585318
4 SM Megamall, Ortigas, Mandaluyong City, Mandal... 121.057508 14.584450
Cuisines ... Currency Has Table booking \
0 French, Japanese, Desserts ... Botswana Pula(P) Yes
1 Japanese ... Botswana Pula(P) Yes
2 Seafood, Asian, Filipino, Indian ... Botswana Pula(P) Yes
3 Japanese, Sushi ... Botswana Pula(P) No
4 Japanese, Korean ... Botswana Pula(P) Yes
Has Online delivery Is delivering now Switch to order menu Price range \
0 No No No 3
1 No No No 3
2 No No No 4
3 No No No 4
4 No No No 4
Aggregate rating Rating color Rating text Votes
0 4.8 Dark Green Excellent 314
1 4.5 Dark Green Excellent 591
2 4.4 Green Very Good 270
3 4.9 Dark Green Excellent 365
4 4.8 Dark Green Excellent 229
[5 rows x 21 columns]
In [9]:
df.tail()
Out[9]:
| Restaurant ID | Restaurant Name | Country Code | City | Address | Locality | Locality Verbose | Longitude | Latitude | Cuisines | ... | Currency | Has Table booking | Has Online delivery | Is delivering now | Switch to order menu | Price range | Aggregate rating | Rating color | Rating text | Votes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 9546 | 5915730 | Naml۱ Gurme | 208 | ��stanbul | Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ... | Karak�_y | Karak�_y, ��stanbul | 28.977392 | 41.022793 | Turkish | ... | Turkish Lira(TL) | No | No | No | No | 3 | 4.1 | Green | Very Good | 788 |
| 9547 | 5908749 | Ceviz A��ac۱ | 208 | ��stanbul | Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd... | Ko��uyolu | Ko��uyolu, ��stanbul | 29.041297 | 41.009847 | World Cuisine, Patisserie, Cafe | ... | Turkish Lira(TL) | No | No | No | No | 3 | 4.2 | Green | Very Good | 1034 |
| 9548 | 5915807 | Huqqa | 208 | ��stanbul | Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... | Kuru�_e��me | Kuru�_e��me, ��stanbul | 29.034640 | 41.055817 | Italian, World Cuisine | ... | Turkish Lira(TL) | No | No | No | No | 4 | 3.7 | Yellow | Good | 661 |
| 9549 | 5916112 | A���k Kahve | 208 | ��stanbul | Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N... | Kuru�_e��me | Kuru�_e��me, ��stanbul | 29.036019 | 41.057979 | Restaurant Cafe | ... | Turkish Lira(TL) | No | No | No | No | 4 | 4.0 | Green | Very Good | 901 |
| 9550 | 5927402 | Walter's Coffee Roastery | 208 | ��stanbul | Cafea��a Mahallesi, Bademalt۱ Sokak, No 21/B, ... | Moda | Moda, ��stanbul | 29.026016 | 40.984776 | Cafe | ... | Turkish Lira(TL) | No | No | No | No | 2 | 4.0 | Green | Very Good | 591 |
5 rows × 21 columns
In [10]:
df.shape
Out[10]:
(9551, 21)
In [11]:
df.isnull()
Out[11]:
| Restaurant ID | Restaurant Name | Country Code | City | Address | Locality | Locality Verbose | Longitude | Latitude | Cuisines | ... | Currency | Has Table booking | Has Online delivery | Is delivering now | Switch to order menu | Price range | Aggregate rating | Rating color | Rating text | Votes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9546 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 9547 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 9548 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 9549 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
| 9550 | False | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
9551 rows × 21 columns
In [12]:
columns=df.columns.tolist()
print(columns)
['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines', 'Average Cost for two', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Price range', 'Aggregate rating', 'Rating color', 'Rating text', 'Votes']
In [13]:
df['Cuisines'] = df['Cuisines'].fillna('Unknown')
print(df['Cuisines'].value_counts())
Cuisines
North Indian 936
North Indian, Chinese 511
Chinese 354
Fast Food 354
North Indian, Mughlai 334
...
Bengali, Fast Food 1
North Indian, Rajasthani, Asian 1
Chinese, Thai, Malaysian, Indonesian 1
Bakery, Desserts, North Indian, Bengali, South Indian 1
Italian, World Cuisine 1
Name: count, Length: 1826, dtype: int64
In [14]:
df.isnull().sum()
Out[14]:
Restaurant ID 0 Restaurant Name 0 Country Code 0 City 0 Address 0 Locality 0 Locality Verbose 0 Longitude 0 Latitude 0 Cuisines 0 Average Cost for two 0 Currency 0 Has Table booking 0 Has Online delivery 0 Is delivering now 0 Switch to order menu 0 Price range 0 Aggregate rating 0 Rating color 0 Rating text 0 Votes 0 dtype: int64
LEVEL - 1¶
Task 1: Top Cuisines¶
- Determine the top three most common cuisines in the dataset.
In [18]:
cuisine_counts = df['Cuisines'].value_counts()
cuisine_counts
Out[18]:
Cuisines
North Indian 936
North Indian, Chinese 511
Chinese 354
Fast Food 354
North Indian, Mughlai 334
...
Bengali, Fast Food 1
North Indian, Rajasthani, Asian 1
Chinese, Thai, Malaysian, Indonesian 1
Bakery, Desserts, North Indian, Bengali, South Indian 1
Italian, World Cuisine 1
Name: count, Length: 1826, dtype: int64
In [20]:
cuisine_counts.head(5)
Out[20]:
Cuisines North Indian 936 North Indian, Chinese 511 Chinese 354 Fast Food 354 North Indian, Mughlai 334 Name: count, dtype: int64
In [26]:
print(df['Cuisines'].unique())
['French, Japanese, Desserts' 'Japanese' 'Seafood, Asian, Filipino, Indian' ... 'Burger, Izgara' 'World Cuisine, Patisserie, Cafe' 'Italian, World Cuisine']
In [32]:
top3=cuisine_counts.head(3)
top3
Out[32]:
Cuisines North Indian 936 North Indian, Chinese 511 Chinese 354 Name: count, dtype: int64
In [34]:
fig = px.bar(
x=top3.index,
y=top3.values,
title='Top Three Cuisines',
labels={'x': 'Cuisines', 'y': 'Number of Restaurants'},
color=top3.values,
color_continuous_scale=px.colors.sequential.Plasma
)
fig.show()
- Calculate the percentage of restaurants that serve each of the top cuisines.
In [36]:
total_r= len(df)
total_r
Out[36]:
9551
In [37]:
for Cuisines, i in top3.items():
percentage= round( (i / total_r * 100), 2)
print(f"{Cuisines}: {percentage}%")
North Indian: 9.8% North Indian, Chinese: 5.35% Chinese: 3.71%
In [38]:
results = pd.DataFrame({
'Cuisine Type': list(top3.keys()),
'Number of Restaurants': list(top3.values),
'Percentage': percentage
})
labels = results['Cuisine Type']
sizes = results['Percentage']
fig, ax = plt.subplots()
ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, wedgeprops={'width':0.4})
ax.axis('equal')
plt.title('Top 3 Cuisines by % of Restaurants')
plt.show()
Task 2 : City Analysis¶
Identify the city with the highest numberof restaurants in the dataset.
In [41]:
city_counts= df['City'].value_counts()
print(city_counts)
city_counts.index[0]
City
New Delhi 5473
Gurgaon 1118
Noida 1080
Faridabad 251
Ghaziabad 25
...
Panchkula 1
Mc Millan 1
Mayfield 1
Macedon 1
Vineland Station 1
Name: count, Length: 141, dtype: int64
Out[41]:
'New Delhi'
Calculate the average rating for restaurants in each city.
In [48]:
avg= df.groupby('City')['Aggregate rating'].mean().reset_index()
print("*--------------Average Ratings city wise-----------*")
print(avg.round(2))
*--------------Average Ratings city wise-----------*
City Aggregate rating
0 Abu Dhabi 4.30
1 Agra 3.96
2 Ahmedabad 4.16
3 Albany 3.56
4 Allahabad 3.40
.. ... ...
136 Weirton 3.90
137 Wellington City 4.25
138 Winchester Bay 3.20
139 Yorkton 3.30
140 ��stanbul 4.29
[141 rows x 2 columns]
Determine the city with the highest average rating.
In [51]:
top_city = avg.sort_values(by='Aggregate rating', ascending=False).reset_index()
top_rated_city= avg.max().reset_index()
print(f"Top City: {top_city}, Rating: {top_rated_city}")
Top City: index City Aggregate rating 0 56 Inner City 4.900000 1 107 Quezon City 4.800000 2 73 Makati City 4.650000 3 95 Pasig City 4.633333 4 75 Mandaluyong City 4.625000 .. ... ... ... 136 88 New Delhi 2.438845 137 83 Montville 2.400000 138 78 Mc Millan 2.400000 139 89 Noida 2.036204 140 43 Faridabad 1.866932 [141 rows x 3 columns], Rating: index 0 0 City ��stanbul 1 Aggregate rating 4.9
In [53]:
print(type(top_rated_city))
<class 'pandas.core.frame.DataFrame'>
In [55]:
print("Top City:", top_city.iloc[0]['City'])
print("Top Rating:", top_city.iloc[0]['Aggregate rating'])
Top City: Inner City Top Rating: 4.9
In [ ]:
Task 3: Price Range Distribution¶
Create a histogram or bar chart tovisualize the distribution of price rangesamong the restaurants.
In [61]:
print(df['Price range'].unique())
[3 4 2 1]
In [65]:
price_counts = df['Price range'].value_counts().sort_index()
print(price_counts)
Price range 1 4444 2 3113 3 1408 4 586 Name: count, dtype: int64
In [81]:
price_percent = round((price_counts/ price_counts.sum()) * 100, 2)
price_data = pd.DataFrame({
'Price range': price_counts.index,
'Number of Restaurants': price_counts.values,
'Percentage': price_percent.values
})
print(price_data)
Price range Number of Restaurants Percentage 0 1 4444 46.53 1 2 3113 32.59 2 3 1408 14.74 3 4 586 6.14
In [83]:
fig = px.bar(
price_data,
x='Price range',
y='Number of Restaurants',
title='Distribution of Restaurants by Price range',
color_discrete_sequence=['skyblue']
)
fig.update_layout(
xaxis_title='Price range',
yaxis_title='Number of Restaurants',
plot_bgcolor='white',
yaxis=dict(showgrid=True, gridcolor='lightgray'),
)
fig.show()
In [ ]:
Calculate the percentage of restaurantsin each price range category.
In [154]:
fig = px.pie(price_data, names='Price range', values='Percentage', title='Percentage of Restaurants by Price Range')
fig.show()
In [ ]:
Task 4: Online Delivery¶
Determine the percentage of restaurants that offer online delivery.
In [91]:
print(df['Has Online delivery'].unique())
['No' 'Yes']
In [95]:
delivery_counts = df['Has Online delivery'].value_counts()
In [99]:
length=len(df[df['Has Online delivery']=='Yes'])
In [107]:
total=len(df)
total
Out[107]:
9551
In [146]:
delivery_percentage = round((length / total) * 100, 2)
In [148]:
print(delivery_percentage)
25.66
Compare the average ratings of restaurants with and without online delivery.
In [140]:
avg_ratings = df.groupby('Has Online delivery')['Aggregate rating'].mean()
In [142]:
print(avg_ratings)
Has Online delivery No 2.465296 Yes 3.248837 Name: Aggregate rating, dtype: float64
In [131]:
plt.figure(figsize=(6, 4))
avg_ratings.plot(kind='bar', color=['coral', 'lightblue'])
plt.title('Average Ratings: With vs Without Online Delivery')
plt.ylabel('Average Rating')
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
In [ ]: